metadata_legend <- data.table::fread('../data/metadata/metadata_legend.csv')
metadata_2018 <- data.table::fread('../data/metadata/2018.csv')
metadata_2018[, Risk := "No"]
data.table::set(metadata_2018, which(metadata_2018$Microcystin > 1), "Risk", "Low")
data.table::set(metadata_2018, which(metadata_2018$Microcystin >= 10), "Risk", "Moderate")
data.table::set(metadata_2018, which(metadata_2018$Microcystin >= 50), "Risk", "High")
metadata_2019 <- data.table::fread('../data/metadata/2019.csv')
metadata_2019[, Risk := "No"]
data.table::set(metadata_2019, which(metadata_2019$Microcystin > 1), "Risk", "Low")
data.table::set(metadata_2019, which(metadata_2019$Microcystin >= 10), "Risk", "Moderate")
data.table::set(metadata_2019, which(metadata_2019$Microcystin >= 50), "Risk", "High")
metadata_2020 <- data.table::fread('../data/metadata/2020.csv')
metadata_2020[, Risk := "No"]
data.table::set(metadata_2020, which(metadata_2020$Microcystin > 1), "Risk", "Low")
data.table::set(metadata_2020, which(metadata_2020$Microcystin >= 10), "Risk", "Moderate")
data.table::set(metadata_2020, which(metadata_2020$Microcystin >= 50), "Risk", "High")
read_counts <- construct_ASVtable('../data/16S_processing/finalized_reads')
read_counts <- read_counts[, c(TRUE, colSums(read_counts[,-1]) > 5000), with = FALSE]
data.table::setnames(read_counts, colnames(read_counts), gsub('_S.*', '', colnames(read_counts)))
classifications <- read_counts[[1]]
classifications <- dada2::assignTaxonomy(classifications, '../data/16S_processing/databases/rdp_train_set_18.fa.gz')
classifications <- dada2::assignTaxonomy(classifications, '../data/16S_processing/databases/silva_nr99_v138.1_train_set.fa.gz')